# -*- coding=utf-8 -*-
import pytesseract
import requests
from PIL import Image
import subprocess

def getImg():
    url = "http://kns.cnki.net/kns/checkcode.aspx?t=0.10158463785728089"
    response = requests.session().get(url=url,verify=False)
    # 把验证码图片保存到本地
    with open('img.jpg','wb') as f:
        f.write(response.content)
        # 用pillow模块打开并解析验证码,这里是假的，自动解析以后学会了再实现
    return Image.open("img.jpg")

getImg()

def call_tesseract(in_file):
    tesseract_exe_name = 'tesseract'
    expect_len = 4
    out_file = "o.txt"

    args = [tesseract_exe_name, in_file, out_file]
    proc = subprocess.Popen(args)
    ret = proc.wait()
    if ret != 0:
        print "call tesseract failed:%d" % ret
        return ''
    out_full = out_file + '.txt'
    f = open(out_full)
    text = f.read()
    f.close()
    text = text.rstrip('\r\n')
    text = text.replace(" ", "")
    print "auto read rand_code:%s" % text
    if len(text) != expect_len:
        print "auto read faild:%s, %d" % (text, len(text))
        return ''
    return text

print call_tesseract('img.jpg')